pkgs<-c("here","tidyverse","tidymodels","forcats","viridis","patchwork","tibble", "minpack.lm","stringr","ggsidekick","conflicted","ncdf4","reshape2","pracma","rnoaa","Hmisc","mgcv","RColorBrewer","nls.multstart","rTPC", "lubridate") 
## minpack.lm needed if using nlsLM()
if(length(setdiff(pkgs,rownames(installed.packages())))>0){ install.packages(setdiff(pkgs,rownames(installed.packages())),dependencies=T)}
invisible(lapply(pkgs,library,character.only=T))
# devtools::install_github("seananderson/ggsidekick") ## not on CRAN 
# remotes::install_github("padpadpadpad/rTPC") ## not on CRAN for this R version
tidymodels_prefer(quiet=TRUE)
conflict_prefer("lag","dplyr")
conflict_prefer("summarize", "dplyr")

theme_set(theme_sleek())

home <- here::here()
fxn <- list.files(paste0(home,"/R/functions"))
invisible(sapply(FUN=source,paste0(home,"/R/functions/",fxn)))

Read and filter back-calculated length data


data <- readr::read_csv(paste0(home,"/data/for_analysis/dat.csv")) %>% select(-...1)

## use only length-at-age by filtering on age_ring
d <- data %>% filter(age_ring == "Y")

## sample size by area and cohort
ns<- d %>% 
  group_by(cohort,area) %>% 
  summarise(n=n())

## minimum number of observations per area and cohort
## d %>% group_by(area,cohort) %>% summarize(n=n()) %>% data.frame()
d$area_cohort <- as.factor(paste(d$area,d$cohort))
d <- d %>%
  group_by(area_cohort) %>% 
  filter(n()>100)

## minimum number of observations per area, cohort, and age
## d %>% group_by(area,cohort,age_bc) %>% summarize(n=n()) %>% data.frame()
d$area_cohort_age <- as.factor(paste(d$area,d$cohort,d$age_bc))
d <- d %>%
  group_by(area_cohort_age) %>% 
  filter(n()>10)

## minimum number of observations per gear
##  d %>% group_by(gear) %>% summarize(n=n()) %>% data.frame()
# d$gear<-gsub("K0","",d$gear)
# d <- d %>%
#   group_by(gear) %>% 
#   filter(n()>2000)

## minimum number of cohorts in a given area
cnt <- d %>%
  group_by(area) %>%
  summarise(n=n_distinct(cohort)) %>%
  filter(n>=10)
d <- d[d$area %in% cnt$area,]

## plot cleaned data
ggplot(d, aes(age_bc,length_mm,color=area)) +
  geom_jitter(size=0.1,height=0,alpha=0.1) +
  scale_x_continuous(breaks=seq(20)) +
  theme(axis.text.x=element_text(angle=0)) +
  theme(axis.text=element_text(size=12),axis.title=element_text(size=15)) +
  labs(x="Age",y="Length (mm)") +
  guides(color="none") + 
  facet_wrap(~area,scale="free_x")


## longitude and latitude attributes for each area
area <- c("BS","BT","FB","FM","HO","JM","MU","RA","SI_EK","SI_HA","TH","VN")
nareas<-length(area)
lat <- c(60,60.4,60.3,60.5,63.7,58,59,65.9,57.3,57.4,56.1,57.5)
lon <- c(21.5,18.1,19.5,18,20.9,16.8,18.1,22.3,16.6,16.7,15.9,16.9)
area_attr<-data.frame(cbind(area=area,lat=lat,lon=lon)) %>%
  mutate_at(c("lat","lon"),as.numeric)

Load ERSST data


## download ERSST data - only run when update needed
# sst_dat_all <- get_ersst_data(years=seq(1940,2022),data.dir=paste0(home,"/data"), latrange=c(55,66),lonrange=c(15,23))

## SST based on ERSST data with relatively low spatial resolution (2x2 degrees)
## need to cover at least 2x2 grid area with even numbers for longitude/latitude
sst_areas<-NULL
lat_ranges<-lon_ranges<-list() ## for testing only
for(a in 1:nareas) {
  lat_range <- c(2*floor(area_attr$lat[a]/2),2*floor(area_attr$lat[a]/2)+2)
  lon_range <- c(2*floor(area_attr$lon[a]/2),2*floor(area_attr$lon[a]/2)+2)
  sst_area <- load_ersst_data(years=c(1940,2022),data.dir=paste0(home,"/data"), ncfilename="sst.mnmean.nc",latrange=lat_range,lonrange=lon_range)
  sst_area$area<-area_attr$area[a]
  sst_areas <- bind_rows(sst_areas,sst_area)
  lat_ranges[[a]] <- lat_range
  lon_ranges[[a]] <- lon_range
}
latranges<-data.frame(matrix(unlist(lat_ranges),ncol=2,byrow=T))
lonranges<-data.frame(matrix(unlist(lon_ranges),ncol=2,byrow=T))

## plot SST by area in each month
sst_areas %>%
  ggplot(.,aes(x=year,y=meanSST,group=as.factor(month),color=as.factor(month))) +
  geom_line() +
  scale_color_brewer(palette="Set3") +
  scale_x_continuous(breaks=seq(1940,2020,10)) +
  theme(plot.title=element_text(size=15,face="bold")) +
  theme(axis.text.x=element_text(angle=90)) +
  theme(axis.text=element_text(size=12),axis.title=element_text(size=15)) +
  labs(x="Year",y="Mean SST",title="Mean SST in each month by area") +
  facet_wrap(~area,scale="free_y") +
  NULL


## plot SST by month in each area
sst_areas %>%
  ggplot(.,aes(x=year,y=meanSST,group=as.factor(area),color=as.factor(area))) +
  geom_line() +
  scale_color_brewer(palette="Set3") +
  scale_x_continuous(breaks=seq(1940,2020,10)) +
  theme(plot.title=element_text(size=15,face="bold")) +
  theme(axis.text.x=element_text(angle=90)) +
  theme(axis.text=element_text(size=12),axis.title=element_text(size=15)) +
  labs(x="Year",y="Mean SST",title="Mean SST in each month by area") +
  facet_wrap(~month,scale="free_y") +
  NULL


tab <- sst_areas %>% 
  group_by(area,month) %>%
  summarize(meanSST=mean(meanSST,na.rm=T)) %>%
  pivot_wider(names_from=area,values_from=meanSST,id_cols=month) 

## define seasons 
sst_areas <- sst_areas %>%
  mutate(month=as.numeric(month)) %>%
  mutate(season = case_when(
      month %in%  c(6,7,8,9,10) ~ "warm",
      month %in%  c(11,12,1,2,3,4,5)  ~ "cold")
      )

## mean annual or seasonal (filtered) SST by area
sst_areas_annual <- sst_areas %>%
  group_by(area,year) %>%
  # filter(season=="warm") %>% ## need to filter for specific season!
  summarize(meanSST=mean(meanSST,na.rm=T)) %>% 
  filter(year<2022) ## incomplete 2022 data

## plot SST by area over time
# sst_areas_annual %>%
#   ggplot(.,aes(x=year,y=meanSST,group=area,color=area)) +
#   geom_line() +
#   scale_x_continuous(breaks=seq(1940,2020,10)) +
#   theme(axis.text.x=element_text(angle=90)) +
#   NULL

## calculate SST lags and averages of lags (first few years of life)
sst_areas_lags <- sst_areas_annual %>% 
  mutate(meanSST_yr1=lead(meanSST,1)) %>%
  mutate(meanSST_yr2=lead(meanSST,2)) %>%
  mutate(meanSST_yr3=lead(meanSST,3)) %>%  
  rowwise() %>%
  mutate(meanSST_yr01=mean(c(meanSST,meanSST_yr1),na.rm=T)) %>%
  mutate(meanSST_yr012=mean(c(meanSST,meanSST_yr1,meanSST_yr2),na.rm=T)) %>%
  mutate(meanSST_yr123=mean(c(meanSST_yr1,meanSST_yr2,meanSST_yr3),na.rm=T))

## historical means by area 
sst_areas_wide <- sst_areas_annual %>%
  pivot_wider(names_from=area,values_from=meanSST,id_cols=year) # %>%
  # filter(year<2000)

sst_area_means <- data.frame(area=names(sst_areas_wide)[-1],mean_SST_allyrs=round(colMeans(sst_areas_wide[,-1]),2))

## plot on map
sst_areas_annual

Load gam-predicted temperature data

pred_temp <- read_csv("data/predicted_temp/predicted_temp.csv") %>% dplyr::select(-...1)
#> New names:
#> Rows: 338 Columns: 4
#> ── Column specification
#> ──────────────────────────────────────────────────────── Delimiter: "," chr
#> (1): area dbl (3): ...1, year, mean_temp
#> â„č Use `spec()` to retrieve the full column specification for this data. â„č
#> Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> ‱ `` -> `...1`

ggplot(pred_temp, aes(year, mean_temp)) + 
  facet_wrap(~area) + 
  geom_line()


ggplot(pred_temp, aes(year, mean_temp, color = area)) + 
  geom_line()


# plot temperture range in relation to growth data
d_min <- d %>% 
  group_by(area) %>% 
  summarise(year_min = min(cohort)) %>% 
  ungroup() %>% 
  rename(year = year_min) %>% 
  dplyr::select(area, year)
  
ggplot(pred_temp, aes(year, mean_temp, color = area)) + 
  geom_line() + 
  geom_vline(data = d_min, aes(xintercept = year)) +
  facet_wrap(~area)

Individual growth increments from age a to a+1


## calculate growth increments 
g <- d %>%
  group_by(ID) %>%
  mutate(growth=length_mm-lag(length_mm,default=0)) 

## summarize growth increments by age, area and cohort
gD <- g %>%
  filter(age_bc<7) %>%
  group_by(age_bc,area,cohort) %>%
  summarize(growth_mean=mean(growth,na.rm=T),growth_median=median(growth,na.rm=T), growth_lower=quantile(growth,prob=0.05,na.rm=T),growth_upper=quantile(growth,prob=0.95,na.rm=T)) %>%
  mutate(age_gr=paste0(age_bc-1,"-",age_bc)) %>%
  mutate(year=cohort+age_bc)

## plot growth increments over time to look for trends across ages by area
gD %>%
  ggplot(.,aes(cohort,growth_mean,color=factor(age_gr))) + 
  geom_point(size=0.1,alpha=0.5) + 
  stat_smooth(aes(cohort,growth_mean,group=factor(age_gr))) +
  scale_color_brewer(palette="Paired",name="Age") +
  theme(plot.title=element_text(size=15,face="bold")) +
  theme(axis.text.x=element_text(angle=90)) +
  theme(axis.text=element_text(size=12),axis.title=element_text(size=15)) +
  guides(color=guide_legend(override.aes=list(size=1))) + 
  labs(x="Cohort",y="Mean individual growth increments",title="Growth increments over time by area and age") +
  facet_grid(age_gr~area,scales="free_y") +
  NULL


## plot growth increments over time to look for coherence among areas by age
gD %>%
  ggplot(.,aes(cohort,growth_mean,color=factor(area))) + 
  geom_line(size=0.1) + 
  stat_smooth(aes(cohort,growth_mean,group=factor(area)),size=0.8,se=FALSE,method="gam", formula=y~s(x,k=4)) +
  scale_color_brewer(palette="Paired",name="Area") +
  theme(plot.title=element_text(size=15,face="bold")) +
  theme(axis.text.x=element_text(angle=0)) +
  theme(axis.text=element_text(size=12),axis.title=element_text(size=15)) +
  guides(color=guide_legend(override.aes=list(size=1))) +
  labs(x="Cohort",y="Mean individual growth increments",title="Growth increments over time in each area") +
  facet_wrap(~age_gr,scales="free_y") +
  NULL


## plot growth increments by age as a function of mean SST
# gD %>%
#   left_join(sst_areas_annual, by=c("area","year")) %>%
#   filter(age_bc<7) %>%
#   ggplot(.,aes(meanSST,growth_mean,color=factor(age_gr))) +
#   geom_line(size=0.1) +
#   stat_smooth(aes(meanSST,growth_mean,group=factor(age_gr)),size=0.8,se=FALSE, method="gam", formula=y~s(x,k=4)) +
#   scale_color_brewer(palette="Paired") +
#   theme(plot.title=element_text(size=15,face="bold")) +
#   theme(axis.text.x=element_text(angle=0)) +
#   theme(axis.text=element_text(size=12),axis.title=element_text(size=15)) +
#   guides(color=guide_legend(override.aes=list(size=1))) +
#   labs(x="mean SST",y="Mean individual growth increments",title="Growth increments by age as a function of mean SST in each area") +
#   facet_wrap(~area,scales="free") +
#   NULL

Individual von Bertalanffy or modified Gompertz growth parameter estimates


## get individual growth parameters (functions: VBGF/Gompertz,nls_out,fit_nls)
IVBG <- d %>% 
  group_by(ID) %>% 
  summarize(k=nls_out(fit_nls(length_mm,age_bc,min_nage=4,model="VBGF"))$k,
            k_se=nls_out(fit_nls(length_mm,age_bc,min_nage=4,model="VBGF"))$k_se,
            linf=nls_out(fit_nls(length_mm,age_bc,min_nage=4,model="VBGF"))$linf,
            linf_se=nls_out(fit_nls(length_mm,age_bc,min_nage=4,model="VBGF"))$linf_se)

## model can be 'VBGF' (default) or 'Gompertz'  

drop_na(k_se) # do we want this? needed to calculate quantiles, but if we don’t want to select individuals based on standard errors or quantiles of them we don’t need this

IVBG2 <- IVBG %>%
  mutate(k_lwr = k - 1.96*k_se,
         k_upr = k + 1.96*k_se,
         linf_lwr = linf - 1.96*linf_se,
         linf_upr = linf + 1.96*linf_se,
         row_id = row_number()) %>% 
  drop_na(k_se) # do we want this? needed to calculate quantiles, but if we don't want to select individuals based on standard errors or quantiles of them we don't need this

# IVBG2 <- IVBG %>%
#   tidylog::drop_na(linf_se) %>%
#   tidylog::mutate(keep = ifelse(k_se < quantile(k_se, prob = 0.95), "Y", "N")) %>%
#   tidylog::filter(keep == "Y")

# Plot all K's
IVBG2 %>%
  #filter(row_id < 5000) %>%
  ggplot(aes(row_id, k, ymin = k_lwr, ymax = k_upr)) +
  geom_point(alpha = 0.5) +
  geom_errorbar(alpha = 0.5) +
  NULL


# Plot all L_inf's
IVBG2 %>%
  #filter(row_id < 5000) %>%
  ggplot(aes(row_id, linf, ymin = linf_lwr, ymax = linf_upr)) +
  geom_point(alpha = 0.5) +
  geom_errorbar(alpha = 0.5) +
  NULL


# After some exploring, we though that it's easy to detect which individuals have "nonsense" parameter estimates when looking at L_inf. Take this individual for instance:
t <- IVBG2 %>%
  tidylog::drop_na(linf_se) %>%
  tidylog::mutate(keep = ifelse(linf < linf_se, "N", "Y")) %>%
  filter(ID == "1970_234_FM")
#> drop_na: no rows removed
#> mutate: new variable 'keep' (character) with 2 unique values and 0% NA
t

# Absolutely mad L_inf (and therefore also K due to their correlation), but the model fit in relation to data looks good:
d %>%
  filter(ID == "1970_234_FM") %>%
  tidylog::mutate(length_mm_pred = t$linf*(1-exp(-t$k*age_bc))) %>%
  ggplot(aes(age_bc, length_mm)) +
  geom_point() +
  geom_line(aes(age_bc, length_mm_pred), color = "tomato3", inherit.aes = FALSE)
#> mutate (grouped): new variable 'length_mm_pred' (double) with 5 unique values and 0% NA


IVBG2 %>%
  tidylog::drop_na(linf_se) %>%
  tidylog::mutate(keep = ifelse(k < k_se, "N", "Y")) %>%
  #filter(row_id < 10000) %>%
  ggplot(aes(row_id, k, ymin = k_lwr, ymax = k_upr, color = keep)) +
  geom_point(alpha = 0.5) +
  facet_wrap(~keep, ncol = 1) +
  geom_errorbar(alpha = 0.5) +
  NULL
#> drop_na: no rows removed
#> mutate: new variable 'keep' (character) with 2 unique values and 0% NA


# We can also consider removing the upper 95% quantile of standard errors (not quantile of K directly)
IVBG2 %>%
  tidylog::drop_na(linf_se) %>%
  tidylog::mutate(keep = ifelse(k > quantile(k_se, probs = 0.95), "N", "Y")) %>%
  #filter(row_id < 10000) %>%
  ggplot(aes(row_id, k, ymin = k_lwr, ymax = k_upr, color = keep)) +
  geom_point(alpha = 0.5) +
  facet_wrap(~keep, ncol = 1) +
  geom_errorbar(alpha = 0.5) +
  NULL
#> drop_na: no rows removed
#> mutate: new variable 'keep' (character) with 2 unique values and 0% NA


IVBG2 %>% tidylog::filter(k > quantile(k_se, probs = 0.95))
#> filter: removed 8,128 rows (20%), 33,084 rows remaining

# compare how the means and quantiles differ depending on this filtering
IVBG2_filter <- IVBG2 %>%
  tidylog::drop_na(k_se) %>%
  tidylog::filter(k_se < quantile(k_se, probs = 0.95)) %>%
  group_by()
#> drop_na: no rows removed
#> filter: removed 2,061 rows (5%), 39,151 rows remaining

# IVBG2_filter <- IVBG2 %>% 
#   tidylog::drop_na(k_se) %>% 
#   tidylog::filter(k < k_se) %>% 
#   group_by()

# add cohort and area attributes
d_red <- d[!duplicated(d[,"ID"]),names(d) %in% c("ID","cohort","area")]

IVBG2 <- IVBG2 %>% left_join(d_red,by="ID")
IVBG2_filter <- IVBG2_filter %>% left_join(d_red,by="ID")

# summarize growth coefficients by cohort and area
VBG <- IVBG2 %>%
  group_by(cohort,area) %>%
  summarize(k_mean=mean(k,na.rm=T),
            k_median=quantile(k,prob=0.5,na.rm=T),
            k_lwr=quantile(k,prob=0.05,na.rm=T),
            k_upr=quantile(k,prob=0.95,na.rm=T))
#> `summarise()` has grouped output by 'cohort'. You can override using the
#> `.groups` argument.

VBG_filter <- IVBG2_filter %>%
  group_by(cohort,area) %>%
  summarize(k_mean=mean(k,na.rm=T),
            k_median=quantile(k,prob=0.5,na.rm=T),
            k_lwr=quantile(k,prob=0.05,na.rm=T),
            k_upr=quantile(k,prob=0.95,na.rm=T))
#> `summarise()` has grouped output by 'cohort'. You can override using the
#> `.groups` argument.

ggplot() +
  geom_ribbon(data = VBG, aes(cohort, k_median, ymin = k_lwr, ymax = k_upr, fill = "All k's"), alpha = 0.4) +
  geom_ribbon(data = VBG_filter, aes(cohort, k_median, ymin = k_lwr, ymax = k_upr, fill = "Filtered"), alpha = 0.4) +
  geom_line(data = VBG, aes(cohort, k_median, color = "All k's")) + 
  geom_line(data = VBG_filter, aes(cohort, k_median, color = "Filtered")) + 
  guides(fill = FALSE) +
  facet_wrap(~area)
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.


# No difference at all really. We should probably just discuss that with this model, achieving biologically reasonable parameter values and a good fit to data are sometimes two different things. In our case, we just want a representative value of the growth (as in time to reach average maximum size in the population) that accounts for the entire growth trajectory of an individual, for each area and cohort.
## using the fit_nls_multstart() instead works but takes a long time to fit:
# summarize(k=nls_out(fit_nls_multstart(length_mm,age_bc,min_nage=3,model="VBGF")))

# test<-unique(VBG$cohort[VBG$area=="TH"]);min(test);max(test)

## add number of samples
samplesize <- d %>%  group_by(cohort,area) %>%  summarise(n=n())
#> `summarise()` has grouped output by 'cohort'. You can override using the
#> `.groups` argument.
VBG <- VBG %>% left_join(samplesize,by=c("cohort","area"))

## add SST by year/area and order by overall mean SST across years 
VBG <- VBG %>%
  drop_na() %>%
  rename(year=cohort) %>%
  left_join(sst_areas_lags,by=c("area","year")) %>%
  left_join(sst_area_means,by=c("area")) %>%
  arrange(area,year) %>%
  ungroup() %>%
  mutate(area=fct_reorder(area,as.integer(mean_SST_allyrs)))

## scale (z-score) growth coefficients within each area
zscore <- function(x){ (x-mean(x,na.rm=TRUE))/sd(x,na.rm=TRUE) }
VBGz <- VBG %>%
  group_by(area) %>%
  mutate(k_mean=zscore(k_mean),k_median=zscore(k_median)) %>%
  ungroup() %>%
  mutate(area=fct_reorder(area,as.integer(mean_SST_allyrs)))

## colors for plots
colors<-rev(colorRampPalette(brewer.pal(name="RdYlBu",n=10))(nareas))

## plot scaled growth coefficients over time to look for coherence among areas
VBGz %>%
  ggplot(.,aes(year,k_mean,color=area,group=area)) +
  geom_line(size=0.6) +
  scale_color_brewer(palette="Paired") +
  theme(axis.text.x=element_text(angle=0)) +
  labs(x="Cohort",y="Scaled growth rate coefficient") +
  NULL


## plot median growth coefficients by year and area against mean SST
VBG %>%
  ggplot(.,aes(meanSST,k_median,color=area)) + 
  geom_point(size=1) + ## aes(size=n)
  stat_smooth(aes(meanSST,k_median,group=area),size=0.5,se=F,method="gam", formula=y~s(x,k=5)) +
  scale_color_manual(values=colors,name="Area") + ## (mean SST)
  theme(plot.title=element_text(size=15,face="bold")) +
  theme(axis.text.x=element_text(angle=0)) +
  theme(axis.text=element_text(size=12),axis.title=element_text(size=15)) +
  guides(color=guide_legend(override.aes=list(size=1))) +
  labs(x="Temperature (C)",y="Growth rate coefficient", title="Median annual growth coefficient vs. mean annual SST in each area") +
  facet_wrap(~area,scales="free") +
  NULL

Correlation between growth coefficients and temperature by area


## calculate correlations between mean annual growth coefficients and SST 
df_corr <- VBGz %>% 
  select(area,k_median,meanSST) %>%
  data.frame() %>%
  group_by(area) %>% 
  summarize(corr=cor(k_median,meanSST,use="pairwise.complete.obs")) %>%
  left_join(sst_area_means,by="area") %>%
  rename(mean_SST=mean_SST_allyrs)

## plot correlation as a function of the overall average SST by area
# df_corr %>%
#   ggplot(.,aes(mean_SST,corr,color=area)) +
#   geom_point(size=3) +
#   geom_smooth(method='lm',formula=y~as.numeric(x)) +
#   scale_color_brewer(palette="Paired") +
#   NULL

## gamm with SST effect and accounting for spatial correlation 
gamm_fit <- gamm(corr~s(mean_SST,k=3),data=df_corr,corr=corSpatial(form=~lon+lat,type='gaussian'))
plot(gamm_fit$gam,xlab="Long-term mean SST by area",ylab="Correlation growth rate coefficient vs SST")
abline(h=0,lwd=0.2)
points(df_corr$mean_SST,df_corr$corr,pch=21,lwd=0.1,bg="gray50")
text(df_corr$mean_SST,df_corr$corr,labels=df_corr$area,cex=0.5,pos=1)


## populations in 'colder' areas tend to respond positively to warming
## populations in 'warmer' areas tend to respond negatively to warming

## but we need higher resolution SST data for better area specific SSTs
## some areas are so close that they get the same 2x2 grid and hence SST

## Also, growth-temperature relationships are non-linear (see next section)

Non-linear Sharpe-Schoolfield model fit to growth coefficients


model <- 'sharpeschoolhigh_1981'

## get starting values on full dataset for Sharpe-Schoolfield model
dat <- VBG %>%
  select(k_median,meanSST) %>%
  rename(rate=k_median) %>%
  rename(temp=meanSST) %>%
  filter(!is.na(rate))
lower <- get_lower_lims(dat$temp,dat$rate,model_name=model)
upper <- get_upper_lims(dat$temp,dat$rate,model_name=model)
start <- get_start_vals(dat$temp,dat$rate,model_name=model)
  
## Sharpe-Schoolfield model fit to data for each area
preds <- NULL
for(a in 1:nareas) {
  ## get data
  dat <- VBG[VBG$area==area[a],] %>% 
    select(k_median,meanSST,area) %>% 
    rename(rate=k_median) %>%
    rename(temp=meanSST) %>%
    filter(!is.na(rate))
  ## fit model
  fit <- nls_multstart(
    rate~sharpeschoolhigh_1981(temp=temp,r_tref,e,eh,th,tref=8),
    data=dat,
    iter=c(3,3,3,3),
    start_lower=start*0.5,
    start_upper=start*2,
    lower=lower,
    upper=upper,
    supp_errors='Y'
    )
  ## make predictions on new data
  new_data <- data.frame(temp=seq(min(dat$temp),max(dat$temp),length.out=100))
  pred <- augment(fit,newdata=new_data) %>%
    mutate(area=area[a])
  ## add to general data frame
  preds <- data.frame(rbind(preds,pred))
}

## add mean SST across years by area for reordering
pred_nls_fits <- preds %>%
  left_join(sst_area_means,by=c("area")) %>%
  mutate(area=fct_reorder(area,as.integer(mean_SST_allyrs)))

## plot scaled median growth coefficients by year and area against mean SST
p1 <- pred_nls_fits %>%
  ggplot(.,aes(temp,.fitted,color=factor(area))) + 
  geom_point(aes(meanSST,k_median,color=factor(area)),VBG,size=0.6) + ## data
  geom_line(aes(temp,.fitted,group=factor(area)),size=1) +
  scale_color_manual(values=colors,name="Area") +
  theme(plot.title=element_text(size=15,face="bold")) +
  theme(axis.text.x=element_text(angle=0)) +
  theme(axis.text=element_text(size=12),axis.title=element_text(size=15)) +
  guides(color=guide_legend(override.aes=list(size=1))) +
  scale_x_continuous(breaks=seq(-5,20,1)) +
  labs(x="Temperature (C)",y="Growth rate coefficient", title="Median annual growth coefficient vs mean annual SST by area with Sharpe-Schoolfield fit") +
  # facet_wrap(~area,scale="free_y") +
  NULL

p1

gam_pred_temp <- read_csv("output/gam_predicted_temps.csv") %>%
  drop_na(pred) %>% 
  filter(growth_dat == "Y" & yday %in% c(yday("2000-03-01"):yday("2000-10-01")) & source == "logger") %>% 
  group_by(area, year) %>% 
  summarise(temp = mean(pred))
#> New names:
#> Rows: 1093608 Columns: 8
#> ── Column specification
#> ──────────────────────────────────────────────────────── Delimiter: "," chr
#> (3): area, source, growth_dat dbl (5): ...1, yday, year, pred, first_cohort
#> â„č Use `spec()` to retrieve the full column specification for this data. â„č
#> Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> `summarise()` has grouped output by 'area'. You can override using the
#> `.groups` argument.
#> ‱ `` -> `...1`

## get starting values on full dataset for Sharpe-Schoolfield model
dat <- VBG %>%
  left_join(gam_pred_temp, by = c("area", "year")) %>% 
  select(k_median,temp) %>%
  rename(rate=k_median) %>%
  filter(!is.na(rate))
lower <- get_lower_lims(dat$temp,dat$rate,model_name=model)
lower[4] <- 1 # above func returns na...
upper <- get_upper_lims(dat$temp,dat$rate,model_name=model)
upper[4] <- 10 # above func returns na...
start <- get_start_vals(dat$temp,dat$rate,model_name=model)
start[4] <- 8 # above func returns na...

## Sharpe-Schoolfield model fit to data for each area
preds <- NULL
for(a in 1:nareas) {
  ## get data
  gam_pred_temp2 <- gam_pred_temp[gam_pred_temp$area==area[a],] %>% ungroup() %>% dplyr::select(-area)
  
  dat <- VBG[VBG$area==area[a],] %>% 
    left_join(gam_pred_temp2, by = "year") %>% 
    select(k_median,temp,area) %>% 
    rename(rate=k_median) %>%
    #rename(temp=meanSST) %>%
    filter(!is.na(rate))
  ## fit model
  fit <- nls_multstart(
    rate~sharpeschoolhigh_1981(temp=temp,r_tref,e,eh,th,tref=8),
    data=dat,
    iter=c(3,3,3,3),
    start_lower=start*0.5,
    start_upper=start*2,
    lower=lower,
    upper=upper,
    supp_errors='Y'
    )
  ## make predictions on new data
  new_data <- data.frame(temp=seq(min(dat$temp, na.rm = TRUE),max(dat$temp, na.rm = TRUE),length.out=100))
  pred <- augment(fit,newdata=new_data) %>%
    mutate(area=area[a])
  ## add to general data frame
  preds <- data.frame(rbind(preds,pred))
}

## add mean SST across years by area for reordering
pred_nls_fits <- preds %>%
  left_join(gam_pred_temp %>% group_by(area) %>% summarise(mean_SST_allyrs = mean(temp)),by=c("area")) %>%
  mutate(area=fct_reorder(area,as.integer(mean_SST_allyrs)))

## plot scaled median growth coefficients by year and area against mean SST
VBG2 <- VBG %>% left_join(gam_pred_temp, by = c("area", "year"))

p2 <- pred_nls_fits %>%
  ggplot(.,aes(temp,.fitted,color=factor(area))) + 
  geom_point(aes(temp,k_median,color=factor(area)),VBG2,size=0.6) + ## data
  geom_line(aes(temp,.fitted,group=factor(area)),size=1) +
  scale_color_manual(values=colors,name="Area") +
  theme(plot.title=element_text(size=15,face="bold")) +
  theme(axis.text.x=element_text(angle=0)) +
  theme(axis.text=element_text(size=12),axis.title=element_text(size=15)) +
  guides(color=guide_legend(override.aes=list(size=1))) +
  scale_x_continuous(breaks=seq(-5,20,1)) +
  labs(x="Temperature (C)",y="Growth rate coefficient", title="Median annual growth coefficient vs mean annual SST by area with Sharpe-Schoolfield fit") +
  # facet_wrap(~area,scale="free_y") +
  NULL

# Remove warm-water pollution areas
p3 <- pred_nls_fits %>%
  filter(!area %in% c("BT", "SI_EK", "SI_HA")) %>% 
  ggplot(.,aes(temp,.fitted,color=factor(area))) + 
  geom_point(aes(temp,k_median,color=factor(area)),VBG2 %>% filter(!area %in% c("BT", "SI_EK", "SI_HA")),size=0.6) + ## data
  geom_line(aes(temp,.fitted,group=factor(area)),size=1) +
  scale_color_manual(values=colors,name="Area") +
  theme(plot.title=element_text(size=15,face="bold")) +
  theme(axis.text.x=element_text(angle=0)) +
  theme(axis.text=element_text(size=12),axis.title=element_text(size=15)) +
  guides(color=guide_legend(override.aes=list(size=1))) +
  scale_x_continuous(breaks=seq(-5,20,1)) +
  labs(x="Temperature (C)",y="Growth rate coefficient", title="Median annual growth coefficient vs mean annual SST by area with Sharpe-Schoolfield fit") +
  # facet_wrap(~area,scale="free_y") +
  NULL

p1 / p2 / p3
#> Warning: Removed 1 rows containing missing values (geom_point).
#> Removed 1 rows containing missing values (geom_point).


# Remove lines and area specific trends and just plot the whole things as one data set (polynomial fit)
pred_nls_fits %>%
  ggplot(.,aes(temp,.fitted,color=factor(area))) + 
  geom_point(aes(temp,k_median,color=factor(area)),VBG2,size=0.6) + ## data
  #geom_line(aes(temp,.fitted,group=factor(area)),size=1) +
  stat_smooth(aes(temp,k_median),VBG2, method = lm, formula = y~poly(x, 2), inherit.aes = FALSE) +
  scale_color_manual(values=colors,name="Area") +
  theme(plot.title=element_text(size=15,face="bold")) +
  theme(axis.text.x=element_text(angle=0)) +
  theme(axis.text=element_text(size=12),axis.title=element_text(size=15)) +
  guides(color=guide_legend(override.aes=list(size=1))) +
  scale_x_continuous(breaks=seq(-5,20,1)) +
  labs(x="Temperature (C)",y="Growth rate coefficient", title="Median annual growth coefficient vs mean annual SST by area with Sharpe-Schoolfield fit") +
  # facet_wrap(~area,scale="free_y") +
  NULL
#> Warning: Removed 1 rows containing non-finite values (stat_smooth).
#> Removed 1 rows containing missing values (geom_point).